###### Data preparation ##### #---------------------------------------------#

# This script runs all regression models in the paper and the Appendix, as well as
# figures based on these models.

# load AAA event data by Dukalskis (2021)
AAAD_events <- read_csv("data/_Authoritarian Actions Abroad Database (online appendix).xlsx - Sheet1.csv")

# load coding periods  to build time series and prepare panel data
AAAD_codings <- read_delim("data/AAAD_countries_years.csv")

###### Aggregation country-year-target -------------------------------------------

# Aggregate events at the country-year level
AAAD_events_cy_t <- AAAD_events %>% 
  group_by(country, target, year) %>% 
  summarise(events = length(ID)) %>% 
  mutate(year = as.numeric(year)) %>% # The year variable is coded as "multiple" for seven cases.
  drop_na(year)   # Remove such cases

# pivot to wider
AAAD_events_cy_t_wider <- AAAD_events_cy_t %>%
  pivot_wider(values_from= events, names_from = target) %>% 
  clean_names() 

###### Aggregation country-year-action -------------------------------------------

# Aggregate events at the country-year level and group events
AAAD_events_cy_a <- AAAD_events %>% 
  mutate(action_rec = case_when(action %in% c("family_threatened",
                                              "threatened") ~ "threats",
                                action %in% c("abduction_attempt",
                                              "assassination_attempt",
                                              "extradition_attempt") ~ "attempts",
                                action %in% c("abducted", "attacked",
                                              "arrested/detained",
                                              "assassinated",
                                              "extradited") ~ "executed",
                                T ~ NA_character_)) %>% 
  drop_na(action_rec) %>% 
  group_by(country, action_rec, year) %>% 
  summarise(events = length(ID)) %>% 
  ungroup() %>% 
  mutate(year = as.numeric(year)) %>% 
  drop_na(year) # See above

# pivot to wider
AAAD_events_cy_a_wider <- AAAD_events_cy_a %>%
  pivot_wider(values_from= events, names_from = action_rec) %>% 
  clean_names() 
  
# Merge event country years with coding periods, replace NAs with 0
# Sum number of all events
AAAD_codings_events <- left_join(AAAD_codings, AAAD_events_cy_t_wider) %>% 
  left_join(., AAAD_events_cy_a_wider) %>% 
  replace(is.na(.), 0) %>% 
  mutate(all_events = rowSums(select(., activist, citizen, former_government_official,
                                     journalist, opposition)),
        iso3c = countrycode(country, "country.name", "iso3c"), .after = country) %>%
  arrange(iso3c, year) %>% 
  group_by(iso3c, country) %>% 
  mutate(first_year = min(year, na.rm = T) - 1) %>% 
  group_modify(~ add_row(.x, .before=0)) %>% 
  mutate(year = ifelse(is.na(year), min(first_year, na.rm = T), year)) %>% 
  arrange(iso3c, year) %>% 
  ungroup() %>% 
  select(-first_year)

##### Add data from WB ####
# Population, total
pop_data <- wb_data("SP.POP.TOTL", 
                    gapfill = T, mrv = 35) %>% 
  select(iso3c, year = date, pop = SP.POP.TOTL)

# GDP per capita
gdp_data <- wb_data("NY.GDP.PCAP.CD",  gapfill = T, mrv = 35) %>% 
  select(iso3c, year = date, gdp_pc = NY.GDP.PCAP.CD)

# GDP growth
gdp_growth_data <- wb_data("NY.GDP.MKTP.KD.ZG",  gapfill = T, mrv = 35) %>% 
  select(iso3c, year = date, gdp_growth = NY.GDP.MKTP.KD.ZG)

##### Add military data  #####
military_wb <- rio::import("data/API_MS.MIL.XPND.CD_DS2_en_excel_v2_4256264.xls") %>% 
  clean_names() %>% 
  select(iso3c = country_code, x1960:x2021) %>% 
  pivot_longer(cols = starts_with("x"),
               names_to = "year",
               values_to = "military",
               names_prefix = "x") %>% 
  mutate(year = as.numeric(year))

##### Add V-Dem data #####
# Note that you will get different results if you don't use the same version
# of the vdemdata package that we did
vdem <- vdemdata::vdem %>% 
  mutate(iso3c = countrycode(country_name, "country.name", "iso3c"))  %>% 
  select(iso3c, year, v2x_libdem, v2x_polyarchy, v2csreprss, starts_with("v2x_civlib"),
         e_polity2, v2eltype_0, v2eltype_6, v2x_ex_hereditary, v2cademmob, v2x_regime,
         v2x_ex_military, v2x_ex_party, v2x_clphy, v2x_clpol, v2x_clpriv, 
         v2clkill, v2cltort, v2mecenefm, v2meharjrn, v2meslfcen, v2cldiscm,
         v2cldiscw, v2psparban, v2psbars, v2psoppaut, v2cseeorgs,v2csreprss,
         v2clslavem, v2clslavef, v2clprptym, v2clprptyw, v2clfmove,v2cldmovem,
         v2cldmovew, v2clrelig,v2csrlgrep,v2x_rule) %>% 
  filter(year > 1989) %>% 
  replace_na(list(v2eltype_0 = 0, v2eltype_6 = 0))

#### Add data from Fariss #####
human_rights <- read_csv("data/HumanRightsProtectionScores_v4.01.csv") %>% 
  filter(YEAR > 1989) %>% 
  mutate(iso3c = countrycode(country_name, "country.name", "iso3c"))  %>% 
  select(iso3c, year = YEAR, theta_mean, Amnesty, killing_best)

##### Add REIGN data #####
reign <- read_csv("data/REIGN_2021_4.csv") %>% 
  mutate(iso3c = countrycode(ccode, "cown", "iso3c")) 

reign_cy <- reign %>% 
  group_by(iso3c, year) %>% 
  summarise(political_violence = mean(political_violence, na.rm=T),
            coup_risk = mean(couprisk, na.rm = T),
            tenure = max(tenure_months, na.rm = T))

##### Add state capacity data #####

#data is in dta format #
 State_capacity_Hanson <- read_dta("data/StateCapacityDataset_v1.dta")
 
 State_cap_han <- State_capacity_Hanson %>% 
   filter(year > 1989) %>% 
#   mutate(iso3c = countrycode(country, "country.name", "iso3c"))  %>% 
   select(iso3c = iso3, year, Capacity, Capacity_sd ) %>% 
   drop_na(iso3c)
 
 
 ##### Add Diplometrics data #####
 diplo <- readxl::read_xlsx("data/Diplometrics Diplomatic Representation 1960-2020_20211215.xlsx") %>% 
   clean_names() 
 
 # Aggregate at the country year level, counting the number of diplomatic ties
 diplo_agg <- diplo %>% 
   group_by(sending_country, year) %>% 
   summarise(diplo_rep = length(destination),
             diplo_intens = sum(lor)) %>% 
   ungroup() %>% 
   mutate(iso3c = countrycode(sending_country, origin =  "country.name",
                              destination =  "iso3c")) %>% 
   select(-sending_country)
   
 
#### Merge all data #####
data_final <- left_join(AAAD_codings_events, pop_data, by = c("year", "iso3c")) %>% 
  left_join(., gdp_data, by = c("year", "iso3c")) %>% 
  left_join(., gdp_growth_data, by = c("year", "iso3c")) %>% 
  left_join(., military_wb, by = c("year", "iso3c")) %>% 
  left_join(., vdem, by = c("year", "iso3c")) %>% 
  left_join(., human_rights, by = c("year", "iso3c")) %>% 
  left_join(., reign_cy, by = c("year", "iso3c")) %>% 
  left_join(., State_cap_han, by = c("year", "iso3c")) %>% 
  left_join(., diplo_agg, by = c("year", "iso3c")) %>% 
  distinct(iso3c, year, .keep_all = TRUE) %>% 
  arrange(iso3c, year)  %>% 
  mutate(visit_noNA = ifelse(is.na(diplo_rep), 0, diplo_rep)) %>% 
   group_by(iso3c) 
 
 
 # keep unique obs only, create lag variables and recode some variables
 data_final <- data_final %>% 
   arrange(iso3c, year) %>% 
   group_by(iso3c) %>% 
   mutate(across(pop:visit_noNA, ~ dplyr::lag(.x), .names = "{.col}_lag")) %>% 
   mutate(v2x_civlib_inv = 1 - v2x_civlib,
          civlib_inv_diff1 = v2x_civlib_inv - dplyr::lag(v2x_civlib_inv),
          civlib_inv_diff2 = lag(v2x_civlib_inv) - dplyr::lag(v2x_civlib_inv,2)) %>% 
   ungroup() %>% 
   mutate(elections = ifelse(v2eltype_0 == 1 | v2eltype_6 == 1, 1, 0),
          elections_lag = dplyr::lag(elections),
          iso3c = factor(iso3c),
          year = factor(year),
          any_event = ifelse(all_events > 0, 1, 0),
          v2x_civlib_lag_inv = 1 - v2x_civlib_lag)
 
 # remove all groups with only 0 obs and missing values (due to use of fixed effects)
 data_final_fe <- data_final %>% 
   mutate(v2x_civlib_lag_inv10 = v2x_civlib_lag_inv*10,
          v2x_ex_hereditary_lag10 = v2x_ex_hereditary_lag*10) %>% 
   drop_na(any_event) %>%   
   group_by(iso3c) %>% 
   mutate(all_events_country =  sum(all_events, na.rm = T)) %>% 
   filter(all_events_country > 0) %>% 
   ungroup()   %>% 
   select(iso3c,
          year,
          v2x_civlib_lag_inv,
          v2x_civlib_lag_inv10,
          civlib_inv_diff2,
          e_polity2_lag,
          elections,
          tenure_lag,
          v2x_ex_hereditary_lag10,
          v2x_ex_military_lag,
          v2x_ex_party_lag,
          pop_lag,
          gdp_pc_lag,
          Capacity_lag,
          any_event, 
          all_events,
          v2csreprss,
          v2csrlgrep,
          v2mecenefm,
          v2meharjrn,
          v2meslfcen,
          v2psbars,
          v2psparban,
          v2clkill,
          v2cltort,
          v2cldiscm,
          v2cldiscw, 
          v2psoppaut,
          v2cseeorgs,
          v2clslavem,
          v2clslavef,
          v2clprptym,
          v2clprptyw,
          v2clfmove,
          v2cldmovem,
          v2cldmovew, 
          v2clrelig,
          v2csrlgrep,
          citizen,
          activist,
          former_government_official,
          journalist,
          opposition,
          attempts,
          threats,
          executed,
          diplo_rep_lag,
          diplo_intens_lag,
          visit_noNA_lag,
          theta_mean_lag) %>% 
   as.data.frame()

 save(data_final_fe, file = "data/data_final.Rda")
